{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "-G6H6HsRTFLn",
   "metadata": {
    "id": "-G6H6HsRTFLn"
   },
   "source": [
    "<a target=\"_blank\" href=\"https://colab.research.google.com/github/AI4Finance-Foundation/FinRL-Tutorials/blob/master/1-Introduction/China_A_share_market_tushare.ipynb\">\n",
    "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
    "</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3ShYRMdBTFLp",
   "metadata": {
    "id": "3ShYRMdBTFLp"
   },
   "source": [
    "## Quantitative trading in China A stock market with FinRL"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "pBU3DdPFTFLp",
   "metadata": {
    "id": "pBU3DdPFTFLp"
   },
   "source": [
    "Install FinRL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51W37k2_TFLq",
   "metadata": {
    "id": "51W37k2_TFLq"
   },
   "outputs": [],
   "source": [
    "!pip install wrds\n",
    "!pip install swig\n",
    "!pip install -q condacolab\n",
    "import condacolab\n",
    "condacolab.install()\n",
    "!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig\n",
    "!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ZmuaPTCTFLr",
   "metadata": {
    "id": "9ZmuaPTCTFLr"
   },
   "source": [
    "Install other libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "q6T3o9yTTFLr",
   "metadata": {
    "id": "q6T3o9yTTFLr"
   },
   "outputs": [],
   "source": [
    "!pip install stockstats\n",
    "!pip install tushare\n",
    "#install talib\n",
    "!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz \n",
    "!tar xvzf ta-lib-0.4.0-src.tar.gz\n",
    "import os\n",
    "os.chdir('ta-lib') \n",
    "!./configure --prefix=/usr\n",
    "!make\n",
    "!make install\n",
    "#!sudo make install # Sometimes it need root \n",
    "os.chdir('../')\n",
    "!pip install TA-Lib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "DrReji1OTFLr",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "DrReji1OTFLr",
    "outputId": "325c38e3-ca71-4b58-e0be-104e15011fe2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/\n",
      "fatal: destination path 'FinRL-Meta' already exists and is not an empty directory.\n",
      "/FinRL-Meta\n"
     ]
    }
   ],
   "source": [
    "%cd /\n",
    "!git clone https://github.com/AI4Finance-Foundation/FinRL-Meta\n",
    "%cd /FinRL-Meta/"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "C-MYxgpJTMGP",
   "metadata": {
    "id": "C-MYxgpJTMGP"
   },
   "source": [
    "## Import Modules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "Vx_hcZwgTKQp",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Vx_hcZwgTKQp",
    "outputId": "d6b36801-3064-4251-aadd-2396cb03ad5d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ALL Modules have been imported!\n"
     ]
    }
   ],
   "source": [
    "import warnings\n",
    "\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "import pandas as pd \n",
    "from IPython import display\n",
    "\n",
    "display.set_matplotlib_formats(\"svg\")\n",
    "\n",
    "from meta import config \n",
    "from meta.data_processor import DataProcessor \n",
    "from main import check_and_make_directories \n",
    "from meta.data_processors.tushare import Tushare, ReturnPlotter \n",
    "from meta.env_stock_trading.env_stocktrading_China_A_shares import StockTradingEnv \n",
    "from agents.stablebaselines3_models import DRLAgent \n",
    "import os \n",
    "from typing import List \n",
    "from argparse import ArgumentParser \n",
    "from meta import config \n",
    "from meta.config_tickers import DOW_30_TICKER \n",
    "from meta.config import ( DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR, INDICATORS, TRAIN_START_DATE, TRAIN_END_DATE, TEST_START_DATE, TEST_END_DATE, TRADE_START_DATE, TRADE_END_DATE, ERL_PARAMS, RLlib_PARAMS, SAC_PARAMS, ALPACA_API_KEY, ALPACA_API_SECRET, ALPACA_API_BASE_URL, )\n",
    "\n",
    "import pyfolio\n",
    "from pyfolio import timeseries\n",
    "\n",
    "pd.options.display.max_columns = None\n",
    "\n",
    "print(\"ALL Modules have been imported!\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "FRQz2ptSTjPJ",
   "metadata": {
    "id": "FRQz2ptSTjPJ"
   },
   "source": [
    "## Create Folders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "pmttRZWWTXcd",
   "metadata": {
    "id": "pmttRZWWTXcd"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "''' \n",
    "use check_and_make_directories() to replace the following\n",
    "\n",
    "if not os.path.exists(\"./datasets\"): \n",
    "  os.makedirs(\"./datasets\") \n",
    "if not os.path.exists(\"./trained_models\"): \n",
    "  os.makedirs(\"./trained_models\") \n",
    "if not os.path.exists(\"./tensorboard_log\"): \n",
    "  os.makedirs(\"./tensorboard_log\") \n",
    "if not os.path.exists(\"./results\"): \n",
    "  os.makedirs(\"./results\") \n",
    "'''\n",
    "\n",
    "check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "94s2JtmxTuLq",
   "metadata": {
    "id": "94s2JtmxTuLq"
   },
   "source": [
    "## Download data, cleaning and feature engineering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "xpPTz-xDTovy",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "xpPTz-xDTovy",
    "outputId": "40df5f90-6211-452c-ee63-2dc2c849b370"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tushare successfully connected\n"
     ]
    }
   ],
   "source": [
    "ticker_list = ['600000.SH', '600009.SH', '600016.SH', '600028.SH', '600030.SH', '600031.SH', '600036.SH', '600050.SH', '600104.SH', '600196.SH', '600276.SH', '600309.SH', '600519.SH', '600547.SH', '600570.SH']\n",
    "\n",
    "TRAIN_START_DATE = '2015-01-01' \n",
    "TRAIN_END_DATE= '2019-08-01' \n",
    "TRADE_START_DATE = '2019-08-01' \n",
    "TRADE_END_DATE = '2020-01-03'\n",
    "\n",
    "TIME_INTERVAL = \"1d\" \n",
    "kwargs = {} \n",
    "kwargs['token'] = '27080ec403c0218f96f388bca1b1d85329d563c91a43672239619ef5' \n",
    "p = DataProcessor(data_source='tushare', start_date=TRAIN_START_DATE, end_date=TRADE_END_DATE, time_interval=TIME_INTERVAL, **kwargs)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "svZh2OT0T7PG",
   "metadata": {
    "id": "svZh2OT0T7PG"
   },
   "source": [
    "### Download and Clean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "v_PzruLIT3D1",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "v_PzruLIT3D1",
    "outputId": "fa4b9030-f8ff-41a3-abef-77be4f9d37ce"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 15/15 [00:07<00:00,  1.94it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Download complete! Dataset saved to ./data/dataset.csv. \n",
      "Shape of DataFrame: (17960, 8)\n",
      "Shape of DataFrame:  (18315, 8)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "p.download_data(ticker_list=ticker_list)\n",
    "p.clean_data()\n",
    "p.fillna()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "tsHu-XT_T_vQ",
   "metadata": {
    "id": "tsHu-XT_T_vQ"
   },
   "source": [
    "### Add technical indicator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "VfniyyQQT3nq",
   "metadata": {
    "id": "VfniyyQQT3nq"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tech_indicator_list:  ['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma']\n",
      "indicator:  macd\n",
      "indicator:  boll_ub\n",
      "indicator:  boll_lb\n",
      "indicator:  rsi_30\n",
      "indicator:  cci_30\n",
      "indicator:  dx_30\n",
      "indicator:  close_30_sma\n",
      "indicator:  close_60_sma\n",
      "Succesfully add technical indicators\n",
      "Shape of DataFrame:  (18270, 17)\n"
     ]
    }
   ],
   "source": [
    "p.add_technical_indicator(config.INDICATORS) \n",
    "p.fillna()\n",
    "\n",
    "#print(f\"p.dataframe: {p.dataframe}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "cKZk3jGuUR34",
   "metadata": {
    "id": "cKZk3jGuUR34"
   },
   "source": [
    "## Split training dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "SuKbrwflUVeU",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "SuKbrwflUVeU",
    "outputId": "7596367b-670d-4d6c-b439-033075d87589"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "len(train.tic.unique()): 15\n"
     ]
    }
   ],
   "source": [
    "train = p.data_split(p.dataframe, TRAIN_START_DATE, TRAIN_END_DATE) \n",
    "\n",
    "print(f\"len(train.tic.unique()): {len(train.tic.unique())}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5ONAnSMBUWyu",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "5ONAnSMBUWyu",
    "outputId": "5bdf45d0-7689-4d31-dfa6-cbcbe8e64827"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train.tic.unique(): ['600000.SH' '600009.SH' '600016.SH' '600028.SH' '600030.SH' '600031.SH'\n",
      " '600036.SH' '600050.SH' '600104.SH' '600196.SH' '600276.SH' '600309.SH'\n",
      " '600519.SH' '600547.SH' '600570.SH']\n"
     ]
    }
   ],
   "source": [
    "print(f\"train.tic.unique(): {train.tic.unique()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "BXF8hYDvUXfv",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "BXF8hYDvUXfv",
    "outputId": "a08ebe19-0107-4e31-c6df-816c846aa3f2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train.head():          tic        time  index   open   high    low  close  adjusted_close  \\\n",
      "0  600000.SH  2015-01-08     45  15.87  15.88  15.20  15.25           15.25   \n",
      "0  600009.SH  2015-01-08     46  20.18  20.18  19.73  20.00           20.00   \n",
      "0  600016.SH  2015-01-08     47  10.61  10.66  10.09  10.20           10.20   \n",
      "0  600028.SH  2015-01-08     48   7.09   7.41   6.83   6.85            6.85   \n",
      "0  600030.SH  2015-01-08     49  36.40  36.70  34.68  35.25           35.25   \n",
      "\n",
      "       volume      macd    boll_ub    boll_lb     rsi_30      cci_30  \\\n",
      "0  3306271.72 -0.032571  16.617911  15.012089   6.058641 -125.593009   \n",
      "0   198117.45 -0.016008  20.663897  19.736103  12.828915  -90.842491   \n",
      "0  4851684.17 -0.018247  10.957604   9.997396  11.862558  -99.887006   \n",
      "0  8190902.35 -0.008227   7.342000   6.743000  27.409248   36.578171   \n",
      "0  6376268.69  0.032910  36.576444  33.808556  61.517448   47.947020   \n",
      "\n",
      "        dx_30  close_30_sma  close_60_sma  \n",
      "0   23.014040       15.8150       15.8150  \n",
      "0  100.000000       20.2000       20.2000  \n",
      "0  100.000000       10.4775       10.4775  \n",
      "0   64.934862        7.0425        7.0425  \n",
      "0  100.000000       35.1925       35.1925  \n"
     ]
    }
   ],
   "source": [
    "print(f\"train.head(): {train.head()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "CnwNoBG5UXSQ",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "CnwNoBG5UXSQ",
    "outputId": "3bcf1c7a-e9de-4b92-fc7e-069904d9e6e0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train.shape: (16695, 17)\n"
     ]
    }
   ],
   "source": [
    "print(f\"train.shape: {train.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "joNhXi_ZUXId",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "joNhXi_ZUXId",
    "outputId": "460b9763-6b0f-4976-f772-4a9a7cda2255"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Stock Dimension: 15, State Space: 151\n"
     ]
    }
   ],
   "source": [
    "stock_dimension = len(train.tic.unique()) \n",
    "state_space = stock_dimension * (len(config.INDICATORS) + 2) + 1 \n",
    "\n",
    "print(f\"Stock Dimension: {stock_dimension}, State Space: {state_space}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "le09273cUmzH",
   "metadata": {
    "id": "le09273cUmzH"
   },
   "source": [
    "## Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "Npwpqkr7UpFF",
   "metadata": {
    "id": "Npwpqkr7UpFF"
   },
   "outputs": [],
   "source": [
    "env_kwargs = { \"stock_dim\": stock_dimension, \"hmax\": 1000, \"initial_amount\": 1000000, \"buy_cost_pct\": 6.87e-5, \"sell_cost_pct\": 1.0687e-3, \"reward_scaling\": 1e-4, \"state_space\": state_space, \"action_space\": stock_dimension, \"tech_indicator_list\": config.INDICATORS, \"print_verbosity\": 1, \"initial_buy\": True, \"hundred_each_trade\": True }\n",
    "\n",
    "e_train_gym = StockTradingEnv(df=train, **env_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1POZL3nUyDY",
   "metadata": {
    "id": "f1POZL3nUyDY"
   },
   "outputs": [],
   "source": [
    "env_train, _ = e_train_gym.get_sb_env() \n",
    "\n",
    "print(f\"print(type(env_train)): {print(type(env_train))}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "QkY8sVWhU6PH",
   "metadata": {
    "id": "QkY8sVWhU6PH"
   },
   "source": [
    "### DDPG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "dLjEviBhUzuc",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "dLjEviBhUzuc",
    "outputId": "58226aaa-41dc-45ce-9f5c-1e5cb94d27a4"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'batch_size': 256, 'buffer_size': 50000, 'learning_rate': 0.0005, 'action_noise': NormalActionNoise(mu=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], sigma=[0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1])}\n",
      "Using cpu device\n",
      "Logging to tensorboard_log/ddpg/ddpg_1\n",
      "Episode: 2\n",
      "day: 1112, episode: 2\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 2109049.67\n",
      "total_reward: 1109049.67\n",
      "total_cost: 12151.48\n",
      "total_trades: 16679\n",
      "Sharpe: 0.726\n",
      "=================================\n",
      "Episode: 3\n",
      "day: 1112, episode: 3\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1873632.31\n",
      "total_reward: 873632.31\n",
      "total_cost: 620.69\n",
      "total_trades: 16680\n",
      "Sharpe: 0.650\n",
      "=================================\n",
      "Episode: 4\n",
      "day: 1112, episode: 4\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1480411.95\n",
      "total_reward: 480411.95\n",
      "total_cost: 488.05\n",
      "total_trades: 16680\n",
      "Sharpe: 0.471\n",
      "=================================\n",
      "Episode: 5\n",
      "day: 1112, episode: 5\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1473792.94\n",
      "total_reward: 473792.94\n",
      "total_cost: 488.06\n",
      "total_trades: 16680\n",
      "Sharpe: 0.467\n",
      "=================================\n",
      "-----------------------------------\n",
      "| time/              |            |\n",
      "|    episodes        | 4          |\n",
      "|    fps             | 26         |\n",
      "|    time_elapsed    | 170        |\n",
      "|    total_timesteps | 4452       |\n",
      "| train/             |            |\n",
      "|    actor_loss      | -670       |\n",
      "|    critic_loss     | 1.54e+03   |\n",
      "|    learning_rate   | 0.0005     |\n",
      "|    n_updates       | 3339       |\n",
      "|    reward          | -1.7532761 |\n",
      "-----------------------------------\n",
      "Episode: 6\n",
      "day: 1112, episode: 6\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1459229.94\n",
      "total_reward: 459229.94\n",
      "total_cost: 488.06\n",
      "total_trades: 16680\n",
      "Sharpe: 0.456\n",
      "=================================\n",
      "Episode: 7\n",
      "day: 1112, episode: 7\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1479962.94\n",
      "total_reward: 479962.94\n",
      "total_cost: 488.06\n",
      "total_trades: 16680\n",
      "Sharpe: 0.471\n",
      "=================================\n",
      "Episode: 8\n",
      "day: 1112, episode: 8\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1485250.97\n",
      "total_reward: 485250.97\n",
      "total_cost: 488.03\n",
      "total_trades: 16680\n",
      "Sharpe: 0.474\n",
      "=================================\n",
      "Episode: 9\n",
      "day: 1112, episode: 9\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1493105.94\n",
      "total_reward: 493105.94\n",
      "total_cost: 488.06\n",
      "total_trades: 16680\n",
      "Sharpe: 0.480\n",
      "=================================\n",
      "-----------------------------------\n",
      "| time/              |            |\n",
      "|    episodes        | 8          |\n",
      "|    fps             | 24         |\n",
      "|    time_elapsed    | 369        |\n",
      "|    total_timesteps | 8904       |\n",
      "| train/             |            |\n",
      "|    actor_loss      | -806       |\n",
      "|    critic_loss     | 323        |\n",
      "|    learning_rate   | 0.0005     |\n",
      "|    n_updates       | 7791       |\n",
      "|    reward          | -1.8583821 |\n",
      "-----------------------------------\n",
      "Episode: 10\n",
      "day: 1112, episode: 10\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1474497.96\n",
      "total_reward: 474497.96\n",
      "total_cost: 488.04\n",
      "total_trades: 16680\n",
      "Sharpe: 0.466\n",
      "=================================\n"
     ]
    }
   ],
   "source": [
    "agent = DRLAgent(env=env_train) \n",
    "DDPG_PARAMS = { \"batch_size\": 256, \"buffer_size\": 50000, \"learning_rate\": 0.0005, \"action_noise\": \"normal\", } \n",
    "POLICY_KWARGS = dict(net_arch=dict(pi=[64, 64], qf=[400, 300])) \n",
    "model_ddpg = agent.get_model(\"ddpg\", model_kwargs=DDPG_PARAMS, policy_kwargs=POLICY_KWARGS)\n",
    "\n",
    "trained_ddpg = agent.train_model(model=model_ddpg, tb_log_name='ddpg', total_timesteps=10000)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "ALJ1gqVmVEiU",
   "metadata": {
    "id": "ALJ1gqVmVEiU"
   },
   "source": [
    "### A2C"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "2F5qCGnNUzm7",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "2F5qCGnNUzm7",
    "outputId": "b8db239f-7d37-4587-c511-0f2dc4c6f273"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}\n",
      "Using cpu device\n",
      "Logging to tensorboard_log/a2c/a2c_1\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 251        |\n",
      "|    iterations         | 100        |\n",
      "|    time_elapsed       | 1          |\n",
      "|    total_timesteps    | 500        |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.3      |\n",
      "|    explained_variance | -0.0322    |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 99         |\n",
      "|    policy_loss        | -2.66      |\n",
      "|    reward             | -0.5146969 |\n",
      "|    std                | 1          |\n",
      "|    value_loss         | 2.24       |\n",
      "--------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 248       |\n",
      "|    iterations         | 200       |\n",
      "|    time_elapsed       | 4         |\n",
      "|    total_timesteps    | 1000      |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.3     |\n",
      "|    explained_variance | -0.021    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 199       |\n",
      "|    policy_loss        | 25.7      |\n",
      "|    reward             | 3.5938816 |\n",
      "|    std                | 1         |\n",
      "|    value_loss         | 2.07      |\n",
      "-------------------------------------\n",
      "Episode: 12\n",
      "day: 1112, episode: 12\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1020213.39\n",
      "total_reward: 20213.39\n",
      "total_cost: 56550.76\n",
      "total_trades: 16666\n",
      "Sharpe: 0.161\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 249       |\n",
      "|    iterations         | 300       |\n",
      "|    time_elapsed       | 6         |\n",
      "|    total_timesteps    | 1500      |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.3     |\n",
      "|    explained_variance | 0         |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 299       |\n",
      "|    policy_loss        | -12.4     |\n",
      "|    reward             | 0.6872746 |\n",
      "|    std                | 1         |\n",
      "|    value_loss         | 2.18      |\n",
      "-------------------------------------\n",
      "----------------------------------------\n",
      "| time/                 |              |\n",
      "|    fps                | 249          |\n",
      "|    iterations         | 400          |\n",
      "|    time_elapsed       | 8            |\n",
      "|    total_timesteps    | 2000         |\n",
      "| train/                |              |\n",
      "|    entropy_loss       | -21.3        |\n",
      "|    explained_variance | -0.00236     |\n",
      "|    learning_rate      | 0.0007       |\n",
      "|    n_updates          | 399          |\n",
      "|    policy_loss        | -73.6        |\n",
      "|    reward             | -0.016160956 |\n",
      "|    std                | 1            |\n",
      "|    value_loss         | 16           |\n",
      "----------------------------------------\n",
      "Episode: 13\n",
      "day: 1112, episode: 13\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1135176.66\n",
      "total_reward: 135176.66\n",
      "total_cost: 73436.34\n",
      "total_trades: 16660\n",
      "Sharpe: 0.290\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 247        |\n",
      "|    iterations         | 500        |\n",
      "|    time_elapsed       | 10         |\n",
      "|    total_timesteps    | 2500       |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.4      |\n",
      "|    explained_variance | -0.538     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 499        |\n",
      "|    policy_loss        | 23.1       |\n",
      "|    reward             | -5.0200696 |\n",
      "|    std                | 1.01       |\n",
      "|    value_loss         | 2.38       |\n",
      "--------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 244       |\n",
      "|    iterations         | 600       |\n",
      "|    time_elapsed       | 12        |\n",
      "|    total_timesteps    | 3000      |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.4     |\n",
      "|    explained_variance | 0.256     |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 599       |\n",
      "|    policy_loss        | 91.8      |\n",
      "|    reward             | 1.2820133 |\n",
      "|    std                | 1.01      |\n",
      "|    value_loss         | 23.9      |\n",
      "-------------------------------------\n",
      "Episode: 14\n",
      "day: 1112, episode: 14\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1452599.94\n",
      "total_reward: 452599.94\n",
      "total_cost: 72781.06\n",
      "total_trades: 16668\n",
      "Sharpe: 0.420\n",
      "=================================\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 244         |\n",
      "|    iterations         | 700         |\n",
      "|    time_elapsed       | 14          |\n",
      "|    total_timesteps    | 3500        |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.4       |\n",
      "|    explained_variance | -0.0446     |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 699         |\n",
      "|    policy_loss        | -398        |\n",
      "|    reward             | -0.98048055 |\n",
      "|    std                | 1.01        |\n",
      "|    value_loss         | 424         |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 242        |\n",
      "|    iterations         | 800        |\n",
      "|    time_elapsed       | 16         |\n",
      "|    total_timesteps    | 4000       |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.5      |\n",
      "|    explained_variance | -0.922     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 799        |\n",
      "|    policy_loss        | -17.4      |\n",
      "|    reward             | 0.28415835 |\n",
      "|    std                | 1.01       |\n",
      "|    value_loss         | 0.873      |\n",
      "--------------------------------------\n",
      "Episode: 15\n",
      "day: 1112, episode: 15\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1721987.58\n",
      "total_reward: 721987.58\n",
      "total_cost: 101478.42\n",
      "total_trades: 16673\n",
      "Sharpe: 0.503\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 900       |\n",
      "|    time_elapsed       | 18        |\n",
      "|    total_timesteps    | 4500      |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.5     |\n",
      "|    explained_variance | 0.159     |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 899       |\n",
      "|    policy_loss        | 161       |\n",
      "|    reward             | 1.2920502 |\n",
      "|    std                | 1.01      |\n",
      "|    value_loss         | 69.6      |\n",
      "-------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 1000        |\n",
      "|    time_elapsed       | 20          |\n",
      "|    total_timesteps    | 5000        |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.5       |\n",
      "|    explained_variance | -1.2        |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 999         |\n",
      "|    policy_loss        | 3.34        |\n",
      "|    reward             | -0.53188443 |\n",
      "|    std                | 1.01        |\n",
      "|    value_loss         | 0.634       |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 242        |\n",
      "|    iterations         | 1100       |\n",
      "|    time_elapsed       | 22         |\n",
      "|    total_timesteps    | 5500       |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.5      |\n",
      "|    explained_variance | 0.135      |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 1099       |\n",
      "|    policy_loss        | -38.6      |\n",
      "|    reward             | -1.1616651 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 5.67       |\n",
      "--------------------------------------\n",
      "Episode: 16\n",
      "day: 1112, episode: 16\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1785713.64\n",
      "total_reward: 785713.64\n",
      "total_cost: 91212.36\n",
      "total_trades: 16673\n",
      "Sharpe: 0.561\n",
      "=================================\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 242         |\n",
      "|    iterations         | 1200        |\n",
      "|    time_elapsed       | 24          |\n",
      "|    total_timesteps    | 6000        |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.5       |\n",
      "|    explained_variance | -1.44       |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 1199        |\n",
      "|    policy_loss        | -49.5       |\n",
      "|    reward             | -0.16244832 |\n",
      "|    std                | 1.02        |\n",
      "|    value_loss         | 5.77        |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 242         |\n",
      "|    iterations         | 1300        |\n",
      "|    time_elapsed       | 26          |\n",
      "|    total_timesteps    | 6500        |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.6       |\n",
      "|    explained_variance | -0.218      |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 1299        |\n",
      "|    policy_loss        | -2.18       |\n",
      "|    reward             | -0.65589225 |\n",
      "|    std                | 1.02        |\n",
      "|    value_loss         | 5.23        |\n",
      "---------------------------------------\n",
      "Episode: 17\n",
      "day: 1112, episode: 17\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 656554.83\n",
      "total_reward: -343445.17\n",
      "total_cost: 94231.17\n",
      "total_trades: 16670\n",
      "Sharpe: -0.008\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 242        |\n",
      "|    iterations         | 1400       |\n",
      "|    time_elapsed       | 28         |\n",
      "|    total_timesteps    | 7000       |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | -0.0815    |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 1399       |\n",
      "|    policy_loss        | -27.4      |\n",
      "|    reward             | 0.47324356 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 5.58       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 242        |\n",
      "|    iterations         | 1500       |\n",
      "|    time_elapsed       | 30         |\n",
      "|    total_timesteps    | 7500       |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.5      |\n",
      "|    explained_variance | -0.29      |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 1499       |\n",
      "|    policy_loss        | -25.6      |\n",
      "|    reward             | 0.07530492 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 2.54       |\n",
      "--------------------------------------\n",
      "Episode: 18\n",
      "day: 1112, episode: 18\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 756839.88\n",
      "total_reward: -243160.12\n",
      "total_cost: 77117.12\n",
      "total_trades: 16667\n",
      "Sharpe: -0.039\n",
      "=================================\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 242         |\n",
      "|    iterations         | 1600        |\n",
      "|    time_elapsed       | 33          |\n",
      "|    total_timesteps    | 8000        |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.5       |\n",
      "|    explained_variance | -0.353      |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 1599        |\n",
      "|    policy_loss        | 13.3        |\n",
      "|    reward             | -0.86519796 |\n",
      "|    std                | 1.02        |\n",
      "|    value_loss         | 2.27        |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 242        |\n",
      "|    iterations         | 1700       |\n",
      "|    time_elapsed       | 35         |\n",
      "|    total_timesteps    | 8500       |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | 0.0211     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 1699       |\n",
      "|    policy_loss        | 42.4       |\n",
      "|    reward             | 0.19230042 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 6.29       |\n",
      "--------------------------------------\n",
      "Episode: 19\n",
      "day: 1112, episode: 19\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 677799.00\n",
      "total_reward: -322201.00\n",
      "total_cost: 26499.00\n",
      "total_trades: 16673\n",
      "Sharpe: -0.227\n",
      "=================================\n",
      "------------------------------------\n",
      "| time/                 |          |\n",
      "|    fps                | 242      |\n",
      "|    iterations         | 1800     |\n",
      "|    time_elapsed       | 37       |\n",
      "|    total_timesteps    | 9000     |\n",
      "| train/                |          |\n",
      "|    entropy_loss       | -21.6    |\n",
      "|    explained_variance | -0.724   |\n",
      "|    learning_rate      | 0.0007   |\n",
      "|    n_updates          | 1799     |\n",
      "|    policy_loss        | 63.7     |\n",
      "|    reward             | 3.120001 |\n",
      "|    std                | 1.02     |\n",
      "|    value_loss         | 23       |\n",
      "------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 243       |\n",
      "|    iterations         | 1900      |\n",
      "|    time_elapsed       | 39        |\n",
      "|    total_timesteps    | 9500      |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.6     |\n",
      "|    explained_variance | 0         |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 1899      |\n",
      "|    policy_loss        | -42.6     |\n",
      "|    reward             | 2.0565643 |\n",
      "|    std                | 1.02      |\n",
      "|    value_loss         | 3.17      |\n",
      "-------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 243        |\n",
      "|    iterations         | 2000       |\n",
      "|    time_elapsed       | 41         |\n",
      "|    total_timesteps    | 10000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | 0.0867     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 1999       |\n",
      "|    policy_loss        | 48.8       |\n",
      "|    reward             | 0.42696014 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 7.17       |\n",
      "--------------------------------------\n",
      "Episode: 20\n",
      "day: 1112, episode: 20\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1162416.81\n",
      "total_reward: 162416.81\n",
      "total_cost: 61973.19\n",
      "total_trades: 16671\n",
      "Sharpe: 0.263\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 244        |\n",
      "|    iterations         | 2100       |\n",
      "|    time_elapsed       | 43         |\n",
      "|    total_timesteps    | 10500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | -0.596     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 2099       |\n",
      "|    policy_loss        | -3.97      |\n",
      "|    reward             | 0.03327369 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 0.487      |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 244         |\n",
      "|    iterations         | 2200        |\n",
      "|    time_elapsed       | 45          |\n",
      "|    total_timesteps    | 11000       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.6       |\n",
      "|    explained_variance | 0.055       |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 2199        |\n",
      "|    policy_loss        | 52.4        |\n",
      "|    reward             | -0.16659752 |\n",
      "|    std                | 1.02        |\n",
      "|    value_loss         | 8.07        |\n",
      "---------------------------------------\n",
      "Episode: 21\n",
      "day: 1112, episode: 21\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1250840.39\n",
      "total_reward: 250840.39\n",
      "total_cost: 51180.61\n",
      "total_trades: 16673\n",
      "Sharpe: 0.320\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 243        |\n",
      "|    iterations         | 2300       |\n",
      "|    time_elapsed       | 47         |\n",
      "|    total_timesteps    | 11500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | 0.0503     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 2299       |\n",
      "|    policy_loss        | 22.3       |\n",
      "|    reward             | -0.5018876 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 1.37       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 243        |\n",
      "|    iterations         | 2400       |\n",
      "|    time_elapsed       | 49         |\n",
      "|    total_timesteps    | 12000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | -0.175     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 2399       |\n",
      "|    policy_loss        | 50.1       |\n",
      "|    reward             | -2.8187664 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 7.04       |\n",
      "--------------------------------------\n",
      "Episode: 22\n",
      "day: 1112, episode: 22\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1328636.65\n",
      "total_reward: 328636.65\n",
      "total_cost: 74018.35\n",
      "total_trades: 16673\n",
      "Sharpe: 0.373\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 242       |\n",
      "|    iterations         | 2500      |\n",
      "|    time_elapsed       | 51        |\n",
      "|    total_timesteps    | 12500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.7     |\n",
      "|    explained_variance | -0.57     |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 2499      |\n",
      "|    policy_loss        | 11.8      |\n",
      "|    reward             | -3.533615 |\n",
      "|    std                | 1.03      |\n",
      "|    value_loss         | 1.71      |\n",
      "-------------------------------------\n",
      "------------------------------------\n",
      "| time/                 |          |\n",
      "|    fps                | 241      |\n",
      "|    iterations         | 2600     |\n",
      "|    time_elapsed       | 53       |\n",
      "|    total_timesteps    | 13000    |\n",
      "| train/                |          |\n",
      "|    entropy_loss       | -21.6    |\n",
      "|    explained_variance | 0.019    |\n",
      "|    learning_rate      | 0.0007   |\n",
      "|    n_updates          | 2599     |\n",
      "|    policy_loss        | 38.6     |\n",
      "|    reward             | -4.54127 |\n",
      "|    std                | 1.02     |\n",
      "|    value_loss         | 5.79     |\n",
      "------------------------------------\n",
      "Episode: 23\n",
      "day: 1112, episode: 23\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1481294.41\n",
      "total_reward: 481294.41\n",
      "total_cost: 84745.59\n",
      "total_trades: 16670\n",
      "Sharpe: 0.466\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 2700      |\n",
      "|    time_elapsed       | 55        |\n",
      "|    total_timesteps    | 13500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.6     |\n",
      "|    explained_variance | -0.186    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 2699      |\n",
      "|    policy_loss        | -4.68     |\n",
      "|    reward             | 4.4285936 |\n",
      "|    std                | 1.02      |\n",
      "|    value_loss         | 3.12      |\n",
      "-------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 2800      |\n",
      "|    time_elapsed       | 58        |\n",
      "|    total_timesteps    | 14000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.7     |\n",
      "|    explained_variance | -0.805    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 2799      |\n",
      "|    policy_loss        | 17.8      |\n",
      "|    reward             | 0.2522968 |\n",
      "|    std                | 1.03      |\n",
      "|    value_loss         | 1.26      |\n",
      "-------------------------------------\n",
      "Episode: 24\n",
      "day: 1112, episode: 24\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1055641.36\n",
      "total_reward: 55641.36\n",
      "total_cost: 71306.64\n",
      "total_trades: 16671\n",
      "Sharpe: 0.175\n",
      "=================================\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 2900        |\n",
      "|    time_elapsed       | 60          |\n",
      "|    total_timesteps    | 14500       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.7       |\n",
      "|    explained_variance | 5.96e-08    |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 2899        |\n",
      "|    policy_loss        | 5.2         |\n",
      "|    reward             | -0.13718218 |\n",
      "|    std                | 1.03        |\n",
      "|    value_loss         | 0.268       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 3000        |\n",
      "|    time_elapsed       | 62          |\n",
      "|    total_timesteps    | 15000       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.7       |\n",
      "|    explained_variance | -0.0745     |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 2999        |\n",
      "|    policy_loss        | 5.71        |\n",
      "|    reward             | -0.21766275 |\n",
      "|    std                | 1.03        |\n",
      "|    value_loss         | 0.702       |\n",
      "---------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 3100      |\n",
      "|    time_elapsed       | 64        |\n",
      "|    total_timesteps    | 15500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.6     |\n",
      "|    explained_variance | -0.148    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 3099      |\n",
      "|    policy_loss        | -109      |\n",
      "|    reward             | 1.8754544 |\n",
      "|    std                | 1.02      |\n",
      "|    value_loss         | 37.1      |\n",
      "-------------------------------------\n",
      "Episode: 25\n",
      "day: 1112, episode: 25\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1788462.53\n",
      "total_reward: 788462.53\n",
      "total_cost: 61901.47\n",
      "total_trades: 16670\n",
      "Sharpe: 0.602\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 3200       |\n",
      "|    time_elapsed       | 66         |\n",
      "|    total_timesteps    | 16000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | 0.0183     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 3199       |\n",
      "|    policy_loss        | -71.5      |\n",
      "|    reward             | 0.35727146 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 12.4       |\n",
      "--------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 3300      |\n",
      "|    time_elapsed       | 68        |\n",
      "|    total_timesteps    | 16500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.6     |\n",
      "|    explained_variance | 0         |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 3299      |\n",
      "|    policy_loss        | -61.9     |\n",
      "|    reward             | -0.178148 |\n",
      "|    std                | 1.02      |\n",
      "|    value_loss         | 10.3      |\n",
      "-------------------------------------\n",
      "Episode: 26\n",
      "day: 1112, episode: 26\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1101713.43\n",
      "total_reward: 101713.43\n",
      "total_cost: 69265.57\n",
      "total_trades: 16670\n",
      "Sharpe: 0.269\n",
      "=================================\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 3400        |\n",
      "|    time_elapsed       | 70          |\n",
      "|    total_timesteps    | 17000       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.7       |\n",
      "|    explained_variance | -0.0763     |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 3399        |\n",
      "|    policy_loss        | 24.7        |\n",
      "|    reward             | -0.15418828 |\n",
      "|    std                | 1.03        |\n",
      "|    value_loss         | 2.64        |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 3500        |\n",
      "|    time_elapsed       | 72          |\n",
      "|    total_timesteps    | 17500       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.7       |\n",
      "|    explained_variance | -0.245      |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 3499        |\n",
      "|    policy_loss        | -2.81       |\n",
      "|    reward             | -0.27856484 |\n",
      "|    std                | 1.03        |\n",
      "|    value_loss         | 3.24        |\n",
      "---------------------------------------\n",
      "Episode: 27\n",
      "day: 1112, episode: 27\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 844132.69\n",
      "total_reward: -155867.31\n",
      "total_cost: 83509.31\n",
      "total_trades: 16670\n",
      "Sharpe: 0.101\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 3600       |\n",
      "|    time_elapsed       | 74         |\n",
      "|    total_timesteps    | 18000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.7      |\n",
      "|    explained_variance | -0.00344   |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 3599       |\n",
      "|    policy_loss        | 164        |\n",
      "|    reward             | -1.6985158 |\n",
      "|    std                | 1.03       |\n",
      "|    value_loss         | 84.3       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 3700       |\n",
      "|    time_elapsed       | 76         |\n",
      "|    total_timesteps    | 18500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.7      |\n",
      "|    explained_variance | 0.0165     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 3699       |\n",
      "|    policy_loss        | 56.2       |\n",
      "|    reward             | 0.91595215 |\n",
      "|    std                | 1.03       |\n",
      "|    value_loss         | 6.95       |\n",
      "--------------------------------------\n",
      "Episode: 28\n",
      "day: 1112, episode: 28\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1386483.87\n",
      "total_reward: 386483.87\n",
      "total_cost: 95005.13\n",
      "total_trades: 16674\n",
      "Sharpe: 0.386\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 240        |\n",
      "|    iterations         | 3800       |\n",
      "|    time_elapsed       | 78         |\n",
      "|    total_timesteps    | 19000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.7      |\n",
      "|    explained_variance | -0.0153    |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 3799       |\n",
      "|    policy_loss        | -12        |\n",
      "|    reward             | 0.58676416 |\n",
      "|    std                | 1.03       |\n",
      "|    value_loss         | 1.78       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 240        |\n",
      "|    iterations         | 3900       |\n",
      "|    time_elapsed       | 81         |\n",
      "|    total_timesteps    | 19500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.7      |\n",
      "|    explained_variance | 0.202      |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 3899       |\n",
      "|    policy_loss        | 17         |\n",
      "|    reward             | -2.4740417 |\n",
      "|    std                | 1.03       |\n",
      "|    value_loss         | 1.34       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 240        |\n",
      "|    iterations         | 4000       |\n",
      "|    time_elapsed       | 83         |\n",
      "|    total_timesteps    | 20000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.7      |\n",
      "|    explained_variance | 0.0463     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 3999       |\n",
      "|    policy_loss        | -129       |\n",
      "|    reward             | -1.3204368 |\n",
      "|    std                | 1.03       |\n",
      "|    value_loss         | 43.6       |\n",
      "--------------------------------------\n",
      "Episode: 29\n",
      "day: 1112, episode: 29\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1586191.10\n",
      "total_reward: 586191.10\n",
      "total_cost: 80097.90\n",
      "total_trades: 16668\n",
      "Sharpe: 0.493\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 240       |\n",
      "|    iterations         | 4100      |\n",
      "|    time_elapsed       | 85        |\n",
      "|    total_timesteps    | 20500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.7     |\n",
      "|    explained_variance | -1.72     |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 4099      |\n",
      "|    policy_loss        | -22.7     |\n",
      "|    reward             | 1.4980443 |\n",
      "|    std                | 1.03      |\n",
      "|    value_loss         | 1.58      |\n",
      "-------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 240       |\n",
      "|    iterations         | 4200      |\n",
      "|    time_elapsed       | 87        |\n",
      "|    total_timesteps    | 21000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.7     |\n",
      "|    explained_variance | -0.532    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 4199      |\n",
      "|    policy_loss        | -11.4     |\n",
      "|    reward             | -5.787773 |\n",
      "|    std                | 1.03      |\n",
      "|    value_loss         | 3.18      |\n",
      "-------------------------------------\n",
      "Episode: 30\n",
      "day: 1112, episode: 30\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1777447.38\n",
      "total_reward: 777447.38\n",
      "total_cost: 90888.62\n",
      "total_trades: 16670\n",
      "Sharpe: 0.547\n",
      "=================================\n",
      "----------------------------------------\n",
      "| time/                 |              |\n",
      "|    fps                | 240          |\n",
      "|    iterations         | 4300         |\n",
      "|    time_elapsed       | 89           |\n",
      "|    total_timesteps    | 21500        |\n",
      "| train/                |              |\n",
      "|    entropy_loss       | -21.6        |\n",
      "|    explained_variance | 0.133        |\n",
      "|    learning_rate      | 0.0007       |\n",
      "|    n_updates          | 4299         |\n",
      "|    policy_loss        | -22.7        |\n",
      "|    reward             | -0.011666544 |\n",
      "|    std                | 1.02         |\n",
      "|    value_loss         | 1.85         |\n",
      "----------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 240       |\n",
      "|    iterations         | 4400      |\n",
      "|    time_elapsed       | 91        |\n",
      "|    total_timesteps    | 22000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.6     |\n",
      "|    explained_variance | -0.0931   |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 4399      |\n",
      "|    policy_loss        | -87.3     |\n",
      "|    reward             | 4.1830454 |\n",
      "|    std                | 1.02      |\n",
      "|    value_loss         | 51        |\n",
      "-------------------------------------\n",
      "Episode: 31\n",
      "day: 1112, episode: 31\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1375149.07\n",
      "total_reward: 375149.07\n",
      "total_cost: 70406.93\n",
      "total_trades: 16670\n",
      "Sharpe: 0.385\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 240        |\n",
      "|    iterations         | 4500       |\n",
      "|    time_elapsed       | 93         |\n",
      "|    total_timesteps    | 22500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | 0.0574     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 4499       |\n",
      "|    policy_loss        | -11.7      |\n",
      "|    reward             | 0.38482484 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 1.46       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 240        |\n",
      "|    iterations         | 4600       |\n",
      "|    time_elapsed       | 95         |\n",
      "|    total_timesteps    | 23000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | -0.0213    |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 4599       |\n",
      "|    policy_loss        | 126        |\n",
      "|    reward             | -3.5586853 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 42.1       |\n",
      "--------------------------------------\n",
      "Episode: 32\n",
      "day: 1112, episode: 32\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1483978.74\n",
      "total_reward: 483978.74\n",
      "total_cost: 78944.26\n",
      "total_trades: 16676\n",
      "Sharpe: 0.432\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 240       |\n",
      "|    iterations         | 4700      |\n",
      "|    time_elapsed       | 97        |\n",
      "|    total_timesteps    | 23500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.5     |\n",
      "|    explained_variance | 0.164     |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 4699      |\n",
      "|    policy_loss        | -13.4     |\n",
      "|    reward             | 2.8764431 |\n",
      "|    std                | 1.02      |\n",
      "|    value_loss         | 7.06      |\n",
      "-------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 4800        |\n",
      "|    time_elapsed       | 99          |\n",
      "|    total_timesteps    | 24000       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -21.5       |\n",
      "|    explained_variance | 0.392       |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 4799        |\n",
      "|    policy_loss        | 5.01        |\n",
      "|    reward             | -0.35039684 |\n",
      "|    std                | 1.02        |\n",
      "|    value_loss         | 0.0779      |\n",
      "---------------------------------------\n",
      "Episode: 33\n",
      "day: 1112, episode: 33\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1060066.38\n",
      "total_reward: 60066.38\n",
      "total_cost: 45222.62\n",
      "total_trades: 16678\n",
      "Sharpe: 0.240\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 4900       |\n",
      "|    time_elapsed       | 101        |\n",
      "|    total_timesteps    | 24500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.5      |\n",
      "|    explained_variance | 0          |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 4899       |\n",
      "|    policy_loss        | 12.5       |\n",
      "|    reward             | -0.8052268 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 0.379      |\n",
      "--------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 5000      |\n",
      "|    time_elapsed       | 103       |\n",
      "|    total_timesteps    | 25000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.6     |\n",
      "|    explained_variance | -0.0739   |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 4999      |\n",
      "|    policy_loss        | -18.7     |\n",
      "|    reward             | 1.3659521 |\n",
      "|    std                | 1.02      |\n",
      "|    value_loss         | 1.24      |\n",
      "-------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 5100       |\n",
      "|    time_elapsed       | 105        |\n",
      "|    total_timesteps    | 25500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.6      |\n",
      "|    explained_variance | -0.239     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 5099       |\n",
      "|    policy_loss        | 133        |\n",
      "|    reward             | -1.5272913 |\n",
      "|    std                | 1.02       |\n",
      "|    value_loss         | 50.9       |\n",
      "--------------------------------------\n",
      "Episode: 34\n",
      "day: 1112, episode: 34\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1378748.53\n",
      "total_reward: 378748.53\n",
      "total_cost: 42573.47\n",
      "total_trades: 16679\n",
      "Sharpe: 0.388\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 5200       |\n",
      "|    time_elapsed       | 107        |\n",
      "|    total_timesteps    | 26000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.7      |\n",
      "|    explained_variance | 0.000385   |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 5199       |\n",
      "|    policy_loss        | 2.25       |\n",
      "|    reward             | 0.55412763 |\n",
      "|    std                | 1.03       |\n",
      "|    value_loss         | 0.234      |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 5300       |\n",
      "|    time_elapsed       | 109        |\n",
      "|    total_timesteps    | 26500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.7      |\n",
      "|    explained_variance | 0          |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 5299       |\n",
      "|    policy_loss        | -51.8      |\n",
      "|    reward             | -1.1033877 |\n",
      "|    std                | 1.03       |\n",
      "|    value_loss         | 10.4       |\n",
      "--------------------------------------\n",
      "Episode: 35\n",
      "day: 1112, episode: 35\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1045888.73\n",
      "total_reward: 45888.73\n",
      "total_cost: 54739.27\n",
      "total_trades: 16677\n",
      "Sharpe: 0.190\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 5400       |\n",
      "|    time_elapsed       | 111        |\n",
      "|    total_timesteps    | 27000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.7      |\n",
      "|    explained_variance | -0.011     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 5399       |\n",
      "|    policy_loss        | 24.7       |\n",
      "|    reward             | 0.15243596 |\n",
      "|    std                | 1.03       |\n",
      "|    value_loss         | 2.49       |\n",
      "--------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 5500      |\n",
      "|    time_elapsed       | 113       |\n",
      "|    total_timesteps    | 27500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.7     |\n",
      "|    explained_variance | 0.0326    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 5499      |\n",
      "|    policy_loss        | 30.9      |\n",
      "|    reward             | -2.938769 |\n",
      "|    std                | 1.03      |\n",
      "|    value_loss         | 4.05      |\n",
      "-------------------------------------\n",
      "Episode: 36\n",
      "day: 1112, episode: 36\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1183880.36\n",
      "total_reward: 183880.36\n",
      "total_cost: 69766.64\n",
      "total_trades: 16674\n",
      "Sharpe: 0.295\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 5600      |\n",
      "|    time_elapsed       | 116       |\n",
      "|    total_timesteps    | 28000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.8     |\n",
      "|    explained_variance | 0.0681    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 5599      |\n",
      "|    policy_loss        | -42.8     |\n",
      "|    reward             | 2.8964598 |\n",
      "|    std                | 1.03      |\n",
      "|    value_loss         | 7.82      |\n",
      "-------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 5700      |\n",
      "|    time_elapsed       | 118       |\n",
      "|    total_timesteps    | 28500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.8     |\n",
      "|    explained_variance | -0.00905  |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 5699      |\n",
      "|    policy_loss        | 51.1      |\n",
      "|    reward             | 1.4617499 |\n",
      "|    std                | 1.04      |\n",
      "|    value_loss         | 12.4      |\n",
      "-------------------------------------\n",
      "Episode: 37\n",
      "day: 1112, episode: 37\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1466201.61\n",
      "total_reward: 466201.61\n",
      "total_cost: 64320.39\n",
      "total_trades: 16672\n",
      "Sharpe: 0.425\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 5800       |\n",
      "|    time_elapsed       | 120        |\n",
      "|    total_timesteps    | 29000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.8      |\n",
      "|    explained_variance | -0.00422   |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 5799       |\n",
      "|    policy_loss        | 62.2       |\n",
      "|    reward             | -1.0605353 |\n",
      "|    std                | 1.04       |\n",
      "|    value_loss         | 17.1       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 5900       |\n",
      "|    time_elapsed       | 122        |\n",
      "|    total_timesteps    | 29500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.8      |\n",
      "|    explained_variance | 0.0639     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 5899       |\n",
      "|    policy_loss        | 52.5       |\n",
      "|    reward             | -1.0755574 |\n",
      "|    std                | 1.04       |\n",
      "|    value_loss         | 7.3        |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| time/                 |              |\n",
      "|    fps                | 241          |\n",
      "|    iterations         | 6000         |\n",
      "|    time_elapsed       | 124          |\n",
      "|    total_timesteps    | 30000        |\n",
      "| train/                |              |\n",
      "|    entropy_loss       | -21.9        |\n",
      "|    explained_variance | -0.0229      |\n",
      "|    learning_rate      | 0.0007       |\n",
      "|    n_updates          | 5999         |\n",
      "|    policy_loss        | -54.6        |\n",
      "|    reward             | -0.076917104 |\n",
      "|    std                | 1.04         |\n",
      "|    value_loss         | 13           |\n",
      "----------------------------------------\n",
      "Episode: 38\n",
      "day: 1112, episode: 38\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1501477.52\n",
      "total_reward: 501477.52\n",
      "total_cost: 55184.48\n",
      "total_trades: 16676\n",
      "Sharpe: 0.439\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 6100       |\n",
      "|    time_elapsed       | 126        |\n",
      "|    total_timesteps    | 30500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.9      |\n",
      "|    explained_variance | 0.0975     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 6099       |\n",
      "|    policy_loss        | -50.8      |\n",
      "|    reward             | -0.4812304 |\n",
      "|    std                | 1.04       |\n",
      "|    value_loss         | 5.22       |\n",
      "--------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 6200      |\n",
      "|    time_elapsed       | 128       |\n",
      "|    total_timesteps    | 31000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -21.9     |\n",
      "|    explained_variance | 0.15      |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 6199      |\n",
      "|    policy_loss        | -9.12     |\n",
      "|    reward             | 3.5098653 |\n",
      "|    std                | 1.04      |\n",
      "|    value_loss         | 2.78      |\n",
      "-------------------------------------\n",
      "Episode: 39\n",
      "day: 1112, episode: 39\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1426594.15\n",
      "total_reward: 426594.15\n",
      "total_cost: 78575.85\n",
      "total_trades: 16675\n",
      "Sharpe: 0.408\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 6300       |\n",
      "|    time_elapsed       | 130        |\n",
      "|    total_timesteps    | 31500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.9      |\n",
      "|    explained_variance | -3.56      |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 6299       |\n",
      "|    policy_loss        | -0.961     |\n",
      "|    reward             | -0.2890849 |\n",
      "|    std                | 1.04       |\n",
      "|    value_loss         | 0.115      |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 6400       |\n",
      "|    time_elapsed       | 132        |\n",
      "|    total_timesteps    | 32000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -21.9      |\n",
      "|    explained_variance | 0.0332     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 6399       |\n",
      "|    policy_loss        | 72.6       |\n",
      "|    reward             | -1.0264957 |\n",
      "|    std                | 1.05       |\n",
      "|    value_loss         | 12.1       |\n",
      "--------------------------------------\n",
      "Episode: 40\n",
      "day: 1112, episode: 40\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 476475.18\n",
      "total_reward: -523524.82\n",
      "total_cost: 78162.82\n",
      "total_trades: 16674\n",
      "Sharpe: -0.055\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 6500       |\n",
      "|    time_elapsed       | 134        |\n",
      "|    total_timesteps    | 32500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22        |\n",
      "|    explained_variance | 0.0456     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 6499       |\n",
      "|    policy_loss        | 6.29       |\n",
      "|    reward             | -2.7645295 |\n",
      "|    std                | 1.05       |\n",
      "|    value_loss         | 1.9        |\n",
      "--------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 6600      |\n",
      "|    time_elapsed       | 136       |\n",
      "|    total_timesteps    | 33000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22       |\n",
      "|    explained_variance | -0.0041   |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 6599      |\n",
      "|    policy_loss        | -59.1     |\n",
      "|    reward             | 0.7839271 |\n",
      "|    std                | 1.05      |\n",
      "|    value_loss         | 17.1      |\n",
      "-------------------------------------\n",
      "Episode: 41\n",
      "day: 1112, episode: 41\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1851615.76\n",
      "total_reward: 851615.76\n",
      "total_cost: 99163.24\n",
      "total_trades: 16678\n",
      "Sharpe: 0.573\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 6700      |\n",
      "|    time_elapsed       | 138       |\n",
      "|    total_timesteps    | 33500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22       |\n",
      "|    explained_variance | 0.65      |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 6699      |\n",
      "|    policy_loss        | -113      |\n",
      "|    reward             | 2.1288087 |\n",
      "|    std                | 1.05      |\n",
      "|    value_loss         | 25.3      |\n",
      "-------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 6800       |\n",
      "|    time_elapsed       | 140        |\n",
      "|    total_timesteps    | 34000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.1      |\n",
      "|    explained_variance | -0.799     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 6799       |\n",
      "|    policy_loss        | -31.5      |\n",
      "|    reward             | 0.22091949 |\n",
      "|    std                | 1.05       |\n",
      "|    value_loss         | 2.54       |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 6900        |\n",
      "|    time_elapsed       | 142         |\n",
      "|    total_timesteps    | 34500       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -22.1       |\n",
      "|    explained_variance | -0.113      |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 6899        |\n",
      "|    policy_loss        | 5.26        |\n",
      "|    reward             | -0.18549353 |\n",
      "|    std                | 1.06        |\n",
      "|    value_loss         | 0.444       |\n",
      "---------------------------------------\n",
      "Episode: 42\n",
      "day: 1112, episode: 42\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 720810.70\n",
      "total_reward: -279189.30\n",
      "total_cost: 72394.30\n",
      "total_trades: 16675\n",
      "Sharpe: 0.099\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 7000      |\n",
      "|    time_elapsed       | 145       |\n",
      "|    total_timesteps    | 35000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22.1     |\n",
      "|    explained_variance | -0.109    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 6999      |\n",
      "|    policy_loss        | -28.1     |\n",
      "|    reward             | 0.7332911 |\n",
      "|    std                | 1.06      |\n",
      "|    value_loss         | 2.49      |\n",
      "-------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 7100      |\n",
      "|    time_elapsed       | 147       |\n",
      "|    total_timesteps    | 35500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22.1     |\n",
      "|    explained_variance | 0.0755    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 7099      |\n",
      "|    policy_loss        | -18.7     |\n",
      "|    reward             | 4.0186872 |\n",
      "|    std                | 1.06      |\n",
      "|    value_loss         | 7.08      |\n",
      "-------------------------------------\n",
      "Episode: 43\n",
      "day: 1112, episode: 43\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1398182.60\n",
      "total_reward: 398182.60\n",
      "total_cost: 79516.40\n",
      "total_trades: 16675\n",
      "Sharpe: 0.396\n",
      "=================================\n",
      "----------------------------------------\n",
      "| time/                 |              |\n",
      "|    fps                | 241          |\n",
      "|    iterations         | 7200         |\n",
      "|    time_elapsed       | 149          |\n",
      "|    total_timesteps    | 36000        |\n",
      "| train/                |              |\n",
      "|    entropy_loss       | -22.1        |\n",
      "|    explained_variance | -0.33        |\n",
      "|    learning_rate      | 0.0007       |\n",
      "|    n_updates          | 7199         |\n",
      "|    policy_loss        | -6.51        |\n",
      "|    reward             | -0.009883001 |\n",
      "|    std                | 1.06         |\n",
      "|    value_loss         | 0.204        |\n",
      "----------------------------------------\n",
      "------------------------------------\n",
      "| time/                 |          |\n",
      "|    fps                | 241      |\n",
      "|    iterations         | 7300     |\n",
      "|    time_elapsed       | 151      |\n",
      "|    total_timesteps    | 36500    |\n",
      "| train/                |          |\n",
      "|    entropy_loss       | -22.2    |\n",
      "|    explained_variance | -0.00478 |\n",
      "|    learning_rate      | 0.0007   |\n",
      "|    n_updates          | 7299     |\n",
      "|    policy_loss        | 50.9     |\n",
      "|    reward             | 2.793739 |\n",
      "|    std                | 1.06     |\n",
      "|    value_loss         | 7.97     |\n",
      "------------------------------------\n",
      "Episode: 44\n",
      "day: 1112, episode: 44\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 600826.50\n",
      "total_reward: -399173.50\n",
      "total_cost: 77926.50\n",
      "total_trades: 16678\n",
      "Sharpe: 0.133\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 7400      |\n",
      "|    time_elapsed       | 153       |\n",
      "|    total_timesteps    | 37000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22.2     |\n",
      "|    explained_variance | 0.0186    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 7399      |\n",
      "|    policy_loss        | -3.51     |\n",
      "|    reward             | 1.4254856 |\n",
      "|    std                | 1.06      |\n",
      "|    value_loss         | 0.285     |\n",
      "-------------------------------------\n",
      "------------------------------------\n",
      "| time/                 |          |\n",
      "|    fps                | 241      |\n",
      "|    iterations         | 7500     |\n",
      "|    time_elapsed       | 155      |\n",
      "|    total_timesteps    | 37500    |\n",
      "| train/                |          |\n",
      "|    entropy_loss       | -22.2    |\n",
      "|    explained_variance | -0.0104  |\n",
      "|    learning_rate      | 0.0007   |\n",
      "|    n_updates          | 7499     |\n",
      "|    policy_loss        | 24.3     |\n",
      "|    reward             | 4.622466 |\n",
      "|    std                | 1.06     |\n",
      "|    value_loss         | 3.63     |\n",
      "------------------------------------\n",
      "Episode: 45\n",
      "day: 1112, episode: 45\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1564982.73\n",
      "total_reward: 564982.73\n",
      "total_cost: 84114.27\n",
      "total_trades: 16676\n",
      "Sharpe: 0.468\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 7600      |\n",
      "|    time_elapsed       | 157       |\n",
      "|    total_timesteps    | 38000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22.2     |\n",
      "|    explained_variance | 0.00967   |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 7599      |\n",
      "|    policy_loss        | -170      |\n",
      "|    reward             | 1.3509696 |\n",
      "|    std                | 1.06      |\n",
      "|    value_loss         | 71        |\n",
      "-------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 7700       |\n",
      "|    time_elapsed       | 159        |\n",
      "|    total_timesteps    | 38500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.2      |\n",
      "|    explained_variance | -0.00654   |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 7699       |\n",
      "|    policy_loss        | 38.4       |\n",
      "|    reward             | 0.61605316 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 5.05       |\n",
      "--------------------------------------\n",
      "Episode: 46\n",
      "day: 1112, episode: 46\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1095522.52\n",
      "total_reward: 95522.52\n",
      "total_cost: 61225.48\n",
      "total_trades: 16675\n",
      "Sharpe: 0.238\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 242        |\n",
      "|    iterations         | 7800       |\n",
      "|    time_elapsed       | 161        |\n",
      "|    total_timesteps    | 39000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.2      |\n",
      "|    explained_variance | -0.0942    |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 7799       |\n",
      "|    policy_loss        | 8.72       |\n",
      "|    reward             | -1.5902064 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 0.977      |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 242        |\n",
      "|    iterations         | 7900       |\n",
      "|    time_elapsed       | 163        |\n",
      "|    total_timesteps    | 39500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.2      |\n",
      "|    explained_variance | -1.12      |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 7899       |\n",
      "|    policy_loss        | 7.73       |\n",
      "|    reward             | 0.30307877 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 0.12       |\n",
      "--------------------------------------\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 242       |\n",
      "|    iterations         | 8000      |\n",
      "|    time_elapsed       | 165       |\n",
      "|    total_timesteps    | 40000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22.2     |\n",
      "|    explained_variance | 0.0763    |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 7999      |\n",
      "|    policy_loss        | 10.2      |\n",
      "|    reward             | 1.3091534 |\n",
      "|    std                | 1.07      |\n",
      "|    value_loss         | 2.45      |\n",
      "-------------------------------------\n",
      "Episode: 47\n",
      "day: 1112, episode: 47\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1261810.19\n",
      "total_reward: 261810.19\n",
      "total_cost: 71173.81\n",
      "total_trades: 16674\n",
      "Sharpe: 0.332\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 242       |\n",
      "|    iterations         | 8100      |\n",
      "|    time_elapsed       | 167       |\n",
      "|    total_timesteps    | 40500     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22.2     |\n",
      "|    explained_variance | -0.0688   |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 8099      |\n",
      "|    policy_loss        | 34.7      |\n",
      "|    reward             | 0.4635085 |\n",
      "|    std                | 1.07      |\n",
      "|    value_loss         | 4.17      |\n",
      "-------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 8200       |\n",
      "|    time_elapsed       | 169        |\n",
      "|    total_timesteps    | 41000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.2      |\n",
      "|    explained_variance | -0.127     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 8199       |\n",
      "|    policy_loss        | -88.8      |\n",
      "|    reward             | -2.1892762 |\n",
      "|    std                | 1.06       |\n",
      "|    value_loss         | 17.9       |\n",
      "--------------------------------------\n",
      "Episode: 48\n",
      "day: 1112, episode: 48\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1277660.51\n",
      "total_reward: 277660.51\n",
      "total_cost: 65861.49\n",
      "total_trades: 16671\n",
      "Sharpe: 0.341\n",
      "=================================\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 8300        |\n",
      "|    time_elapsed       | 171         |\n",
      "|    total_timesteps    | 41500       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -22.2       |\n",
      "|    explained_variance | -0.0755     |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 8299        |\n",
      "|    policy_loss        | -22.5       |\n",
      "|    reward             | -0.10433925 |\n",
      "|    std                | 1.06        |\n",
      "|    value_loss         | 1.22        |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 8400        |\n",
      "|    time_elapsed       | 173         |\n",
      "|    total_timesteps    | 42000       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -22.3       |\n",
      "|    explained_variance | -0.0394     |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 8399        |\n",
      "|    policy_loss        | 67.8        |\n",
      "|    reward             | -0.40242904 |\n",
      "|    std                | 1.07        |\n",
      "|    value_loss         | 17.3        |\n",
      "---------------------------------------\n",
      "Episode: 49\n",
      "day: 1112, episode: 49\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 799815.66\n",
      "total_reward: -200184.34\n",
      "total_cost: 68632.34\n",
      "total_trades: 16679\n",
      "Sharpe: 0.076\n",
      "=================================\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 8500        |\n",
      "|    time_elapsed       | 175         |\n",
      "|    total_timesteps    | 42500       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -22.3       |\n",
      "|    explained_variance | 0.0337      |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 8499        |\n",
      "|    policy_loss        | 16.6        |\n",
      "|    reward             | -0.99479425 |\n",
      "|    std                | 1.07        |\n",
      "|    value_loss         | 2.73        |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 8600       |\n",
      "|    time_elapsed       | 177        |\n",
      "|    total_timesteps    | 43000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.3      |\n",
      "|    explained_variance | 0.0467     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 8599       |\n",
      "|    policy_loss        | 20.3       |\n",
      "|    reward             | -1.9225081 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 2.02       |\n",
      "--------------------------------------\n",
      "Episode: 50\n",
      "day: 1112, episode: 50\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1386600.30\n",
      "total_reward: 386600.30\n",
      "total_cost: 85194.70\n",
      "total_trades: 16678\n",
      "Sharpe: 0.392\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 8700       |\n",
      "|    time_elapsed       | 179        |\n",
      "|    total_timesteps    | 43500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.3      |\n",
      "|    explained_variance | 0.0324     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 8699       |\n",
      "|    policy_loss        | 118        |\n",
      "|    reward             | -11.669185 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 33.8       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 8800       |\n",
      "|    time_elapsed       | 181        |\n",
      "|    total_timesteps    | 44000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.3      |\n",
      "|    explained_variance | 0.118      |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 8799       |\n",
      "|    policy_loss        | 48.3       |\n",
      "|    reward             | 0.06142468 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 8.04       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 8900       |\n",
      "|    time_elapsed       | 183        |\n",
      "|    total_timesteps    | 44500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.3      |\n",
      "|    explained_variance | 0.408      |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 8899       |\n",
      "|    policy_loss        | 14.5       |\n",
      "|    reward             | -3.4698105 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 0.509      |\n",
      "--------------------------------------\n",
      "Episode: 51\n",
      "day: 1112, episode: 51\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1293779.12\n",
      "total_reward: 293779.12\n",
      "total_cost: 82795.88\n",
      "total_trades: 16676\n",
      "Sharpe: 0.345\n",
      "=================================\n",
      "------------------------------------\n",
      "| time/                 |          |\n",
      "|    fps                | 241      |\n",
      "|    iterations         | 9000     |\n",
      "|    time_elapsed       | 186      |\n",
      "|    total_timesteps    | 45000    |\n",
      "| train/                |          |\n",
      "|    entropy_loss       | -22.3    |\n",
      "|    explained_variance | -0.0575  |\n",
      "|    learning_rate      | 0.0007   |\n",
      "|    n_updates          | 8999     |\n",
      "|    policy_loss        | -21.2    |\n",
      "|    reward             | 0.581877 |\n",
      "|    std                | 1.07     |\n",
      "|    value_loss         | 1.7      |\n",
      "------------------------------------\n",
      "------------------------------------\n",
      "| time/                 |          |\n",
      "|    fps                | 242      |\n",
      "|    iterations         | 9100     |\n",
      "|    time_elapsed       | 188      |\n",
      "|    total_timesteps    | 45500    |\n",
      "| train/                |          |\n",
      "|    entropy_loss       | -22.3    |\n",
      "|    explained_variance | -0.182   |\n",
      "|    learning_rate      | 0.0007   |\n",
      "|    n_updates          | 9099     |\n",
      "|    policy_loss        | -2.07    |\n",
      "|    reward             | -2.94732 |\n",
      "|    std                | 1.07     |\n",
      "|    value_loss         | 4.61     |\n",
      "------------------------------------\n",
      "Episode: 52\n",
      "day: 1112, episode: 52\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1586408.84\n",
      "total_reward: 586408.84\n",
      "total_cost: 77611.16\n",
      "total_trades: 16670\n",
      "Sharpe: 0.482\n",
      "=================================\n",
      "-------------------------------------\n",
      "| time/                 |           |\n",
      "|    fps                | 241       |\n",
      "|    iterations         | 9200      |\n",
      "|    time_elapsed       | 190       |\n",
      "|    total_timesteps    | 46000     |\n",
      "| train/                |           |\n",
      "|    entropy_loss       | -22.4     |\n",
      "|    explained_variance | -1.43     |\n",
      "|    learning_rate      | 0.0007    |\n",
      "|    n_updates          | 9199      |\n",
      "|    policy_loss        | 17.8      |\n",
      "|    reward             | 2.3569686 |\n",
      "|    std                | 1.08      |\n",
      "|    value_loss         | 0.743     |\n",
      "-------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 9300        |\n",
      "|    time_elapsed       | 192         |\n",
      "|    total_timesteps    | 46500       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -22.4       |\n",
      "|    explained_variance | -0.131      |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 9299        |\n",
      "|    policy_loss        | -11.8       |\n",
      "|    reward             | -0.31335056 |\n",
      "|    std                | 1.07        |\n",
      "|    value_loss         | 4.55        |\n",
      "---------------------------------------\n",
      "Episode: 53\n",
      "day: 1112, episode: 53\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1541023.35\n",
      "total_reward: 541023.35\n",
      "total_cost: 66428.65\n",
      "total_trades: 16672\n",
      "Sharpe: 0.459\n",
      "=================================\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 9400       |\n",
      "|    time_elapsed       | 194        |\n",
      "|    total_timesteps    | 47000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.4      |\n",
      "|    explained_variance | -0.0205    |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 9399       |\n",
      "|    policy_loss        | -51.5      |\n",
      "|    reward             | -2.5907032 |\n",
      "|    std                | 1.08       |\n",
      "|    value_loss         | 11.2       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 9500       |\n",
      "|    time_elapsed       | 196        |\n",
      "|    total_timesteps    | 47500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.4      |\n",
      "|    explained_variance | -0.0172    |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 9499       |\n",
      "|    policy_loss        | -101       |\n",
      "|    reward             | -7.4422593 |\n",
      "|    std                | 1.08       |\n",
      "|    value_loss         | 24.9       |\n",
      "--------------------------------------\n",
      "Episode: 54\n",
      "day: 1112, episode: 54\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1223144.99\n",
      "total_reward: 223144.99\n",
      "total_cost: 78293.01\n",
      "total_trades: 16669\n",
      "Sharpe: 0.312\n",
      "=================================\n",
      "------------------------------------\n",
      "| time/                 |          |\n",
      "|    fps                | 241      |\n",
      "|    iterations         | 9600     |\n",
      "|    time_elapsed       | 198      |\n",
      "|    total_timesteps    | 48000    |\n",
      "| train/                |          |\n",
      "|    entropy_loss       | -22.4    |\n",
      "|    explained_variance | 0.00348  |\n",
      "|    learning_rate      | 0.0007   |\n",
      "|    n_updates          | 9599     |\n",
      "|    policy_loss        | -16.8    |\n",
      "|    reward             | -2.32945 |\n",
      "|    std                | 1.07     |\n",
      "|    value_loss         | 27.7     |\n",
      "------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 9700       |\n",
      "|    time_elapsed       | 200        |\n",
      "|    total_timesteps    | 48500      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.4      |\n",
      "|    explained_variance | 0.0378     |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 9699       |\n",
      "|    policy_loss        | 8.55       |\n",
      "|    reward             | 0.49679536 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 1.84       |\n",
      "--------------------------------------\n",
      "Episode: 55\n",
      "day: 1112, episode: 55\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 1292285.11\n",
      "total_reward: 292285.11\n",
      "total_cost: 93433.89\n",
      "total_trades: 16669\n",
      "Sharpe: 0.345\n",
      "=================================\n",
      "----------------------------------------\n",
      "| time/                 |              |\n",
      "|    fps                | 241          |\n",
      "|    iterations         | 9800         |\n",
      "|    time_elapsed       | 202          |\n",
      "|    total_timesteps    | 49000        |\n",
      "| train/                |              |\n",
      "|    entropy_loss       | -22.4        |\n",
      "|    explained_variance | -0.0646      |\n",
      "|    learning_rate      | 0.0007       |\n",
      "|    n_updates          | 9799         |\n",
      "|    policy_loss        | 14.9         |\n",
      "|    reward             | -0.023521164 |\n",
      "|    std                | 1.08         |\n",
      "|    value_loss         | 1.3          |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| time/                 |             |\n",
      "|    fps                | 241         |\n",
      "|    iterations         | 9900        |\n",
      "|    time_elapsed       | 204         |\n",
      "|    total_timesteps    | 49500       |\n",
      "| train/                |             |\n",
      "|    entropy_loss       | -22.4       |\n",
      "|    explained_variance | 0.00278     |\n",
      "|    learning_rate      | 0.0007      |\n",
      "|    n_updates          | 9899        |\n",
      "|    policy_loss        | -14.8       |\n",
      "|    reward             | -0.70955884 |\n",
      "|    std                | 1.08        |\n",
      "|    value_loss         | 0.908       |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| time/                 |            |\n",
      "|    fps                | 241        |\n",
      "|    iterations         | 10000      |\n",
      "|    time_elapsed       | 206        |\n",
      "|    total_timesteps    | 50000      |\n",
      "| train/                |            |\n",
      "|    entropy_loss       | -22.3      |\n",
      "|    explained_variance | 0.056      |\n",
      "|    learning_rate      | 0.0007     |\n",
      "|    n_updates          | 9999       |\n",
      "|    policy_loss        | 25.9       |\n",
      "|    reward             | -12.614292 |\n",
      "|    std                | 1.07       |\n",
      "|    value_loss         | 4.95       |\n",
      "--------------------------------------\n"
     ]
    }
   ],
   "source": [
    "agent = DRLAgent(env=env_train) \n",
    "model_a2c = agent.get_model(\"a2c\")\n",
    "\n",
    "trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=50000)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "ArAnGULyVVfK",
   "metadata": {
    "id": "ArAnGULyVVfK"
   },
   "source": [
    "## Trade"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "TzU6JBAWVGPG",
   "metadata": {
    "id": "TzU6JBAWVGPG"
   },
   "outputs": [],
   "source": [
    "trade = p.data_split(p.dataframe, TRADE_START_DATE, TRADE_END_DATE) \n",
    "env_kwargs = { \"stock_dim\": stock_dimension, \"hmax\": 1000, \"initial_amount\": 1000000, \"buy_cost_pct\": 6.87e-5, \"sell_cost_pct\": 1.0687e-3, \"reward_scaling\": 1e-4, \"state_space\": state_space, \"action_space\": stock_dimension, \"tech_indicator_list\": config.INDICATORS, \"print_verbosity\": 1, \"initial_buy\": False, \"hundred_each_trade\": True } \n",
    "e_trade_gym = StockTradingEnv(df=trade, **env_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "fdg8qypiVSOn",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "fdg8qypiVSOn",
    "outputId": "af6f6967-7e06-41c5-850f-d0e2512ecd43"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Episode: 2\n",
      "day: 103, episode: 2\n",
      "begin_total_asset: 1000000.00\n",
      "end_total_asset: 952511.32\n",
      "total_reward: -47488.68\n",
      "total_cost: 68.68\n",
      "total_trades: 608\n",
      "Sharpe: -0.366\n",
      "=================================\n",
      "hit end!\n"
     ]
    }
   ],
   "source": [
    "df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_ddpg, environment=e_trade_gym)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "Ih4rdH3uVSo1",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Ih4rdH3uVSo1",
    "outputId": "291d8234-ee54-4ef2-a511-1e084a783f4e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "df_actions:             600000.SH  600009.SH  600016.SH  600028.SH  600030.SH  600031.SH  \\\n",
      "date                                                                           \n",
      "2019-08-01          0          0       1000       1000          0       1000   \n",
      "2019-08-02          0          0       1000       1000          0       1000   \n",
      "2019-08-05          0          0       1000       1000          0       1000   \n",
      "2019-08-06          0          0       1000       1000          0       1000   \n",
      "2019-08-07          0          0       1000       1000          0       1000   \n",
      "...               ...        ...        ...        ...        ...        ...   \n",
      "2019-12-25          0          0          0          0          0          0   \n",
      "2019-12-26          0          0          0          0          0          0   \n",
      "2019-12-27          0          0          0          0          0          0   \n",
      "2019-12-30          0          0          0          0          0          0   \n",
      "2019-12-31          0          0          0          0          0          0   \n",
      "\n",
      "            600036.SH  600050.SH  600104.SH  600196.SH  600276.SH  600309.SH  \\\n",
      "date                                                                           \n",
      "2019-08-01       1000          0          0          0          0          0   \n",
      "2019-08-02       1000          0          0          0          0          0   \n",
      "2019-08-05       1000          0          0          0          0          0   \n",
      "2019-08-06       1000          0          0          0          0          0   \n",
      "2019-08-07       1000          0          0          0          0          0   \n",
      "...               ...        ...        ...        ...        ...        ...   \n",
      "2019-12-25          0          0          0          0          0          0   \n",
      "2019-12-26          0          0          0          0          0          0   \n",
      "2019-12-27          0          0          0          0          0          0   \n",
      "2019-12-30          0          0          0          0          0          0   \n",
      "2019-12-31          0          0          0          0          0          0   \n",
      "\n",
      "            600519.SH  600547.SH  600570.SH  \n",
      "date                                         \n",
      "2019-08-01          0       1000          0  \n",
      "2019-08-02          0       1000          0  \n",
      "2019-08-05          0       1000          0  \n",
      "2019-08-06          0       1000          0  \n",
      "2019-08-07          0       1000          0  \n",
      "...               ...        ...        ...  \n",
      "2019-12-25          0          0          0  \n",
      "2019-12-26          0          0          0  \n",
      "2019-12-27          0          0          0  \n",
      "2019-12-30          0          0          0  \n",
      "2019-12-31          0          0          0  \n",
      "\n",
      "[103 rows x 15 columns]\n"
     ]
    }
   ],
   "source": [
    "df_actions.to_csv(\"action.csv\", index=False) \n",
    "print(f\"df_actions: {df_actions}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "l7X1KIaVWUYp",
   "metadata": {
    "id": "l7X1KIaVWUYp"
   },
   "source": [
    "## Backtest"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "dUJn8einWPKI",
   "metadata": {
    "id": "dUJn8einWPKI"
   },
   "source": [
    "### matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "pR6hNouKWOoY",
   "metadata": {
    "id": "pR6hNouKWOoY"
   },
   "outputs": [],
   "source": [
    "plotter = ReturnPlotter(df_account_value, trade, TRADE_START_DATE, TRADE_END_DATE)\n",
    "plotter.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "Qx62Q575YC9I",
   "metadata": {
    "id": "Qx62Q575YC9I"
   },
   "outputs": [],
   "source": [
    "# ticket: SSE 50：000016\n",
    "plotter.plot(\"000016\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "XUAh2S9Lamxe",
   "metadata": {
    "id": "XUAh2S9Lamxe"
   },
   "source": [
    "### CSI 300"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "NJZdXMGvYI9O",
   "metadata": {
    "id": "NJZdXMGvYI9O"
   },
   "outputs": [],
   "source": [
    "baseline_df = plotter.get_baseline(\"399300\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ZSRJpKINYcBa",
   "metadata": {
    "id": "ZSRJpKINYcBa"
   },
   "outputs": [],
   "source": [
    "daily_return = plotter.get_return(df_account_value)\n",
    "daily_return_base = plotter.get_return(baseline_df, value_col_name=\"close\")\n",
    "\n",
    "perf_func = timeseries.perf_stats \n",
    "perf_stats_all = perf_func(returns=daily_return, factor_returns=daily_return_base, positions=None, transactions=None, turnover_denom=\"AGB\")\n",
    "print(\"==============DRL Strategy Stats===========\")\n",
    "print(f\"perf_stats_all: {perf_stats_all}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6J0LpdE7YuQe",
   "metadata": {
    "id": "6J0LpdE7YuQe"
   },
   "outputs": [],
   "source": [
    "daily_return = plotter.get_return(df_account_value)\n",
    "daily_return_base = plotter.get_return(baseline_df, value_col_name=\"close\")\n",
    "\n",
    "perf_func = timeseries.perf_stats\n",
    "perf_stats_all = perf_func(returns=daily_return_base, factor_returns=daily_return_base, positions=None, transactions=None, turnover_denom=\"AGB\")\n",
    "\n",
    "print(\"==============Baseline Strategy Stats===========\")\n",
    "\n",
    "print(f\"perf_stats_all: {perf_stats_all}\")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "China_A_share_market_tushare.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "finrl",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "notify_time": "5",
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  },
  "vscode": {
   "interpreter": {
    "hash": "afd6dc03c9be451573fc2885de79a969af6a24a159f11a3ead741ab7a9ff405f"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
